# load r data file from the data interview section, let me know if this doesn't works
load(".RData")
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(lubridate)
library(ggplot2)
library(readxl)
library(ggrepel)
## Warning: package 'ggrepel' was built under R version 4.3.3
library(sf)
## Warning: package 'sf' was built under R version 4.3.3
## Linking to GEOS 3.11.0, GDAL 3.5.3, PROJ 9.1.0; sf_use_s2() is TRUE
A production quality data visualization (meaning it has a legible title and other labels, has no extraneous visual information geoms are clearly seen.): 60 points
# Identify top 3
top_railroads <- nc_top_company_blockings %>%
slice_max(Count, n = 2) %>%
pull(Railroad)
# Group everything else as "Other Railroads"
nc_pie_data <- nc_top_company_blockings %>%
mutate(RailroadGroup = ifelse(Railroad %in% top_railroads, Railroad, "Other Railroads")) %>%
group_by(RailroadGroup) %>%
summarise(Count = sum(Count)) %>%
mutate(perc = round(Count / sum(Count) * 100, 1),
label = paste0(RailroadGroup, "\n", perc, "%"))
# Plot
ggplot(nc_pie_data, aes(x = "", y = Count, fill = RailroadGroup)) +
geom_col(width = 1, color = "white") +
coord_polar(theta = "y") +
theme_void() +
geom_text(aes(label = label),
position = position_stack(vjust = 0.5),
size = 4) +
labs(title = "NC Train Crossing Complaints by Railroad",
fill = "Railroad") +
theme(legend.position = "none") # hides redundant legend
Railroad company CSX is responsible for nearly 70% of all blocked train crossings in North Carolina. Norfolk Southern Railway lags far behind in second, responsible for 26% of complaints.
An exploratory choropleth map related to your final story: 180 points ***
# I'm going to use a blocked train crossings map that I made for PLAN372
# https://data.transportation.gov/Railroads/Crossing-Inventory-Data-Form-71-Current/m2f8-22s6/explore (data source)
locations_crossing_IDS <- read_csv("data/crossing_inventory_may_2025.csv")
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
## dat <- vroom(...)
## problems(dat)
## Rows: 437935 Columns: 20
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (13): Crossing ID, State Code, County Name, City Code, City Name, City D...
## dbl (7): Block Number, Crossing Purpose Code, Crossing Type Code, Crossing ...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# now that I've read in the data I've narrowed down the columns I want
locations_crossing_IDS <- locations_crossing_IDS %>%
select(`Crossing ID`,`Crossing Type`, `Crossing Purpose`, Longitude, Latitude)
# now we're going to join the blocked crossing dataset to locations_crossing_ID by ID, we need to match by crossing ID, also include crossing type, crossing purpose, longitude, latitude, state code
blocked_crossings_location <- blocked_crossings %>%
left_join(locations_crossing_IDS, by = c("Crossing ID" = "Crossing ID"))
## Warning in left_join(., locations_crossing_IDS, by = c(`Crossing ID` = "Crossing ID")): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 33722 of `x` matches multiple rows in `y`.
## ℹ Row 296931 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
## "many-to-many"` to silence this warning.
# for mapping purposes let's make a dataframe only of crossings that have complaints
crossings_with_complaints <- blocked_crossings_location %>%
group_by(`Crossing ID`, Longitude, Latitude, `Crossing Type`, `Crossing Purpose`, State) %>%
summarise(complaint_count = n(), .groups = "drop") %>%
filter(!is.na(Longitude) & !is.na(Latitude))
# let's do this just in NC for mapping ease
crossings_with_complaints_nc <- crossings_with_complaints %>%
filter(State == "NC")
st_crs(crossings_with_complaints_nc)
## Coordinate Reference System: NA
crossings_nc_sf <- st_as_sf(crossings_with_complaints_nc,
coords = c("Longitude", "Latitude"),
crs = 4326)
# need to add all of the counties and a base map that shows population per county, from the census
# counties first
counties_nc <-
tigris::counties(state = "North Carolina", class = "sf") %>%
mutate(GEOID = as.character(GEOID)) %>%
select(GEOID, NAME)
## Retrieving data for the year 2022
## | | | 0% | | | 1% | |= | 1% | |= | 2% | |== | 2% | |== | 3% | |== | 4% | |=== | 4% | |=== | 5% | |==== | 5% | |==== | 6% | |===== | 6% | |===== | 7% | |===== | 8% | |====== | 8% | |====== | 9% | |======= | 9% | |======= | 10% | |======= | 11% | |======== | 11% | |======== | 12% | |========= | 12% | |========= | 13% | |========= | 14% | |========== | 14% | |========== | 15% | |=========== | 15% | |=========== | 16% | |============ | 16% | |============ | 17% | |============ | 18% | |============= | 18% | |============= | 19% | |============== | 19% | |============== | 20% | |============== | 21% | |=============== | 21% | |=============== | 22% | |================ | 22% | |================ | 23% | |================ | 24% | |================= | 24% | |================= | 25% | |================== | 25% | |================== | 26% | |=================== | 26% | |=================== | 27% | |=================== | 28% | |==================== | 28% | |==================== | 29% | |===================== | 29% | |===================== | 30% | |===================== | 31% | |====================== | 31% | |====================== | 32% | |======================= | 32% | |======================= | 33% | |======================= | 34% | |======================== | 34% | |======================== | 35% | |========================= | 35% | |========================= | 36% | |========================== | 36% | |========================== | 37% | |========================== | 38% | |=========================== | 38% | |=========================== | 39% | |============================ | 39% | |============================ | 40% | |============================ | 41% | |============================= | 41% | |============================= | 42% | |============================== | 42% | |============================== | 43% | |============================== | 44% | |=============================== | 44% | |=============================== | 45% | |================================ | 45% | |================================ | 46% | |================================= | 46% | |================================= | 47% | |================================= | 48% | |================================== | 48% | |================================== | 49% | |=================================== | 49% | |=================================== | 50% | |=================================== | 51% | |==================================== | 51% | |==================================== | 52% | |===================================== | 52% | |===================================== | 53% | |===================================== | 54% | |====================================== | 54% | |====================================== | 55% | |======================================= | 55% | |======================================= | 56% | |======================================== | 56% | |======================================== | 57% | |======================================== | 58% | |========================================= | 58% | |========================================= | 59% | |========================================== | 59% | |========================================== | 60% | |========================================== | 61% | |=========================================== | 61% | |=========================================== | 62% | |============================================ | 62% | |============================================ | 63% | |============================================ | 64% | |============================================= | 64% | |============================================= | 65% | |============================================== | 65% | |============================================== | 66% | |=============================================== | 66% | |=============================================== | 67% | |=============================================== | 68% | |================================================ | 68% | |================================================ | 69% | |================================================= | 69% | |================================================= | 70% | |================================================= | 71% | |================================================== | 71% | |================================================== | 72% | |=================================================== | 72% | |=================================================== | 73% | |=================================================== | 74% | |==================================================== | 74% | |==================================================== | 75% | |===================================================== | 75% | |===================================================== | 76% | |====================================================== | 76% | |====================================================== | 77% | |====================================================== | 78% | |======================================================= | 78% | |======================================================= | 79% | |======================================================== | 79% | |======================================================== | 80% | |======================================================== | 81% | |========================================================= | 81% | |========================================================= | 82% | |========================================================== | 82% | |========================================================== | 83% | |========================================================== | 84% | |=========================================================== | 84% | |=========================================================== | 85% | |============================================================ | 85% | |============================================================ | 86% | |============================================================= | 86% | |============================================================= | 87% | |============================================================= | 88% | |============================================================== | 88% | |============================================================== | 89% | |=============================================================== | 89% | |=============================================================== | 90% | |=============================================================== | 91% | |================================================================ | 91% | |================================================================ | 92% | |================================================================= | 92% | |================================================================= | 93% | |================================================================= | 94% | |================================================================== | 94% | |================================================================== | 95% | |=================================================================== | 95% | |=================================================================== | 96% | |==================================================================== | 96% | |==================================================================== | 97% | |==================================================================== | 98% | |===================================================================== | 98% | |===================================================================== | 99% | |======================================================================| 99% | |======================================================================| 100%
### THIS IS FROM PLAN372
## download the national walkability index from the epa
# used chatgpt to help me read in this folder since I've never seen data like this
# path to the .gdb folder (no trailing slash!)
gdb_path <- "data/WalkabilityIndex/Natl_WI.gdb"
st_layers(gdb_path)
## Driver: OpenFileGDB
## Available layers:
## layer_name geometry_type features fields
## 1 NationalWalkabilityIndex Multi Polygon 220739 29
## crs_name
## 1 USA_Contiguous_Albers_Equal_Area_Conic_USGS_version
walkability <- st_read(gdb_path, layer = "NationalWalkabilityIndex")
## Reading layer `NationalWalkabilityIndex' from data source
## `/Users/annafetter/Documents/MEJO570SP25/final/data/WalkabilityIndex/Natl_WI.gdb'
## using driver `OpenFileGDB'
## Simple feature collection with 220739 features and 29 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -10434580 ymin: -83867.97 xmax: 3407868 ymax: 6755033
## Projected CRS: USA_Contiguous_Albers_Equal_Area_Conic_USGS_version
nc_walk <- walkability %>%
filter(STATEFP == "37") # NC FIPS code
nc_walk_map <- ggplot(nc_walk) +
geom_sf(aes(fill = NatWalkInd), color = NA) +
scale_fill_viridis_c(option = "plasma") +
labs(title = "Walkability Index – North Carolina",
fill = "Index") +
theme_minimal()
nc_walk <- st_transform(nc_walk, crs = 4326)
crossings_nc_sf <- st_as_sf(crossings_with_complaints_nc,
coords = c("Longitude", "Latitude"),
crs = 4326)
nc_crossings_map <- ggplot() +
geom_sf(data = nc_walk, aes(fill = NatWalkInd), color = NA, alpha = 0.5) +
geom_sf(data = crossings_nc_sf,
aes(size = complaint_count),
color = "#FF3B3B",
alpha = 0.8) +
scale_fill_viridis_c(option = "magma", direction = -1, begin = 0.2, end = 0.8) +
labs(title = "NC Blocked Train Crossings & Walkability Scores",
fill = "Walkability Index",
size = "Complaint Count") +
theme_minimal() +
# I had some weird overlapping with my labels, chatGPT suggested this
theme(legend.box = "vertical",
legend.spacing.y = unit(0.5, "cm"))
# legend still looks werid
nc_crossings_map
Over 400 civilian complaints of blocked crossings have been reported in
North Carolina since 2022, with clusters of complaints in Rocky Mount
and Charlotte. In Charlotte, many of these blocked crossings are
reported in otherwise walkable areas.
An exploratory point-in-polygon map related to your final story: 30 60 points.
charlotte_walk <- nc_walk %>%
# charlotte metro area code
filter(CSA == "172")
# filter just to points that are in Charlotte metro
charlotte_crossings_sf <- st_intersection(crossings_nc_sf, charlotte_walk)
## Warning: attribute variables are assumed to be spatially constant throughout
## all geometries
# only keep complaints that are in Charlotte metro
charlotte_crossings_map <- ggplot() +
geom_sf(data = charlotte_walk, aes(fill = NatWalkInd), color = NA, alpha = 0.5) +
geom_sf(data = charlotte_crossings_sf,
aes(size = complaint_count),
color = "#FF3B3B",
alpha = 0.8) +
scale_fill_viridis_c(option = "magma", direction = -1, begin = 0.2, end = 0.8) +
labs(title = "Charlotte Train Crossings & Walkability Scores",
fill = "Walkability Index",
size = "Complaint Count") +
theme_minimal()
charlotte_crossings_map
Blocked crossing complaints in the Charlotte metro area are concentrated
in the city’s urban core. These reports often overlap with neighborhoods
that rank high on walkability, potentially disrupting pedestrian
access.
An exploratory interactive JavaScript choropleth map related to your final story: 30 EXTRA CREDIT points
# make a leaflet that let's the user click on how many complaints are at each crossing in Charlotte, and click to see walkability
library(leaflet)
# Create color palette for walkability
pal_walk <- colorNumeric(palette = "magma",
domain = charlotte_walk$NatWalkInd,
reverse = TRUE)
leaflet(data = charlotte_crossings_sf) %>%
# Walkability layer
addPolygons(
data = charlotte_walk,
fillColor = ~pal_walk(NatWalkInd),
fillOpacity = 0.4,
color = "white",
weight = 1,
popup = ~paste0("Walkability Index: ", NatWalkInd)
) %>%
addProviderTiles("CartoDB.Positron") %>%
addCircleMarkers(
radius = 6,
fillColor = "#FF5733",
fillOpacity = 0.8,
stroke = FALSE,
popup = ~paste0("<b>Crossing ID:</b> ", `Crossing.ID`, "<br>",
"<b>Complaints:</b> ", complaint_count)
)